In [5]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go


data = pd.read_csv("Virat_Kohli.csv")
print(data.head())
   Runs  BF  4s  6s     SR  Pos Dismissal  Inns   Opposition         Ground  \
0    12  22   1   0  54.54  2.0       lbw     1  v Sri Lanka       Dambulla   
1    37  67   6   0  55.22  2.0    caught     2  v Sri Lanka       Dambulla   
2    25  38   4   0  65.78  1.0   run out     1  v Sri Lanka  Colombo (RPS)   
3    54  66   7   0  81.81  1.0    bowled     1  v Sri Lanka  Colombo (RPS)   
4    31  46   3   1  67.39  1.0       lbw     2  v Sri Lanka  Colombo (RPS)   

  Start Date  
0  18-Aug-08  
1  20-Aug-08  
2  24-Aug-08  
3  27-Aug-08  
4  29-Aug-08  
In [6]:
# check any null values present in datd
print(data.isnull().sum())
Runs          0
BF            0
4s            0
6s            0
SR            0
Pos           0
Dismissal     0
Inns          0
Opposition    0
Ground        0
Start Date    0
dtype: int64
In [7]:
#calculate total runs 
data["Runs"].sum()
Out[7]:
6184
In [8]:
#calculate average runs scored per match
data["Runs"].mean()
Out[8]:
46.84848484848485
In [9]:
matches=data.index
figure=px.line(data,x=matches,y='Runs',title='Runs Scored by Virat Kohli')

figure.show()
020406080100120020406080100120140160
Runs Scored by Virat KohliindexRuns
plotly-logomark
In [10]:
# Batting Positions
data["Pos"] = data["Pos"].map({3.0: "Batting At 3", 4.0: "Batting At 4", 2.0: "Batting At 2", 
                               1.0: "Batting At 1", 7.0:"Batting At 7", 5.0:"Batting At 5", 
                               6.0: "batting At 6"})

Pos = data["Pos"].value_counts()
label = Pos.index
counts = Pos.values
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Number of Matches At Different Batting Positions')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()
912833331
Batting At 3Batting At 4Batting At 2Batting At 1Batting At 7Batting At 5batting At 6Number of Matches At Different Batting Positions
plotly-logomark
In [26]:
label = data["Pos"]
counts = data["Runs"]
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Runs By Virat Kohli At Different Batting Positions')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()
44771370123110513023
Batting At 3Batting At 4Batting At 5Batting At 1Batting At 2Batting At 7batting At 6Runs By Virat Kohli At Different Batting Positions
plotly-logomark
In [27]:
centuries = data.query("Runs >= 100")
figure = px.bar(centuries, x=centuries["Inns"], y = centuries["Runs"], 
                color = centuries["Runs"],
                title="Centuries By Virat Kohli in First Innings Vs. Second Innings")
figure.show()
0.511.522.5020040060080010001200140016001800
100110120130140150RunsCenturies By Virat Kohli in First Innings Vs. Second InningsInnsRuns
plotly-logomark
In [36]:
# Dismissals of Virat Kohli
dismissal = data["Dismissal"].value_counts()
label = dismissal.index
counts = dismissal.values
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Dismissals of Virat Kohli')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()
84171010911
caughtnot outrun outbowledlbwstumpedhit wicketDismissals of Virat Kohli
plotly-logomark
In [29]:
figure = px.bar(data, x=data["Opposition"], y = data["Runs"], color = data["Runs"],
            title="Most Runs Against Teams")
figure.show()
v Sri Lankav Australiav Bangladeshv South Africav Zimbabwev New Zealandv Irelandv Netherlandsv West Indiesv Pakistanv England02004006008001000
020406080100120140RunsMost Runs Against TeamsOppositionRuns
plotly-logomark
In [30]:
figure = px.bar(centuries, x=centuries["Opposition"], y = centuries["Runs"], 
                color = centuries["Runs"],
                title="Most Centuries Against Teams")
figure.show()
v Sri Lankav Bangladeshv Australiav New Zealandv Englandv West Indiesv Zimbabwev South Africa0100200300400500
100110120130140150RunsMost Centuries Against TeamsOppositionRuns
plotly-logomark
In [31]:
strike_rate = data.query("SR >= 120")
print(strike_rate)
     Runs  BF  4s  6s      SR           Pos Dismissal  Inns     Opposition  \
8      27  19   4   0  142.10  Batting At 7    bowled     1    v Sri Lanka   
32    100  83   8   2  120.48  Batting At 4   not out     1   v Bangladesh   
56     23  11   3   0  209.09  batting At 6   not out     1  v West Indies   
76     43  34   4   1  126.47  Batting At 3    caught     1      v England   
78    102  83  13   2  122.89  Batting At 3    caught     1  v West Indies   
83    100  52   8   7  192.30  Batting At 3   not out     2    v Australia   
85    115  66  18   1  174.24  Batting At 3   not out     2    v Australia   
93     78  65   7   2  120.00  Batting At 3    caught     2  v New Zealand   
130     8   5   2   0  160.00  Batting At 3    caught     1      v England   

            Ground Start Date  
8           Rajkot  15-Dec-09  
32           Dhaka  19-Feb-11  
56          Indore   8-Dec-11  
76      Birmingham  23-Jun-13  
78   Port of Spain   5-Jul-13  
83          Jaipur  16-Oct-13  
85          Nagpur  30-Oct-13  
93        Hamilton  22-Jan-14  
130        Cuttack  19-Jan-17  
In [32]:
figure = px.bar(strike_rate, x = strike_rate["Innings"], 
                y = strike_rate["SR"], 
                color = strike_rate["SR"],
            title="Virat Kohli's High Strike Rates in First Innings Vs. Second Innings")
figure.show()
0.511.522.50100200300400500600700800900
120140160180200SRVirat Kohli's High Strike Rates in First Innings Vs. Second InningsInnsSR
plotly-logomark
In [33]:
figure = px.scatter(data_frame = data, x="Runs",
                    y="4s", size="SR", trendline="ols", 
                    title="Relationship Between Runs Scored and Fours")
figure.show()
02040608010012014016005101520
Relationship Between Runs Scored and FoursRuns4s
plotly-logomark
In [34]:
figure = px.scatter(data_frame = data, x="Runs",
                    y="6s", size="SR", trendline="ols", 
                    title= "Relationship Between Runs Scored and Sixes")
figure.show()
02040608010012014016001234567
Relationship Between Runs Scored and SixesRuns6s
plotly-logomark